Detailed Survival analyis of the Survival lung data.

Libraries

library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('keep.trailing.zeros',TRUE)

Libraries

data(lung)
## Warning in data(lung): data set 'lung' not found
lung$inst <- NULL
lung$status <- lung$status - 1
lung <- lung[complete.cases(lung),]

pander::pander(table(lung$status))
0 1
47 121
pander::pander(summary(lung$time))
Min. 1st Qu. Median Mean 3rd Qu. Max.
5 175 268 310 416 1022

Exploring Raw Features with RRPlot

convar <- colnames(lung)[lapply(apply(lung,2,unique),length) > 10]
convar <- convar[convar != "time"]
topvar <- univariate_BinEnsemble(lung[,c("status",convar)],"status")
pander::pander(topvar)
age wt.loss
0.106 0.106
topv <- min(5,length(topvar))
topFive <- names(topvar)[1:topv]
RRanalysis <- list();
idx <- 1
for (topf in topFive)
{
  RRanalysis[[idx]] <- RRPlot(cbind(lung$status,lung[,topf]),
                              atProb=c(0.90),
                  timetoEvent=lung$time,
                  title=topf,
#                  plotRR=FALSE
                  )
  idx <- idx + 1
}

names(RRanalysis) <- topFive

Reporting the Metrics

ROCAUC <- NULL
CstatCI <- NULL
RRatios <- NULL
LogRangp <- NULL
Sensitivity <- NULL
Specificity <- NULL

for (topf in topFive)
{
  CstatCI <- rbind(CstatCI,RRanalysis[[topf]]$c.index$cstatCI)
  RRatios <- rbind(RRatios,RRanalysis[[topf]]$RR_atP)
  LogRangp <- rbind(LogRangp,RRanalysis[[topf]]$surdif$pvalue)
  Sensitivity <- rbind(Sensitivity,RRanalysis[[topf]]$ROCAnalysis$sensitivity)
  Specificity <- rbind(Specificity,RRanalysis[[topf]]$ROCAnalysis$specificity)
  ROCAUC <- rbind(ROCAUC,RRanalysis[[topf]]$ROCAnalysis$aucs)
}
rownames(CstatCI) <- topFive
rownames(RRatios) <- topFive
rownames(LogRangp) <- topFive
rownames(Sensitivity) <- topFive
rownames(Specificity) <- topFive
rownames(ROCAUC) <- topFive

pander::pander(ROCAUC)
  est lower upper
age 0.586 0.489 0.683
wt.loss 0.558 0.459 0.656
pander::pander(CstatCI)
  mean.C Index median lower upper
age 0.558 0.558 0.504 0.614
wt.loss 0.511 0.510 0.451 0.571
pander::pander(RRatios)
  est lower upper
age 0.997 0.742 1.34
wt.loss 0.785 0.462 1.33
pander::pander(LogRangp)
age 0.704
wt.loss 0.358
pander::pander(Sensitivity)
  est lower upper
age 0.1157 0.0647 0.187
wt.loss 0.0496 0.0184 0.105
pander::pander(Specificity)
  est lower upper
age 0.894 0.769 0.965
wt.loss 0.894 0.769 0.965
meanMatrix <- cbind(ROCAUC[,1],CstatCI[,1],Sensitivity[,1],Specificity[,1],RRatios[,1])
colnames(meanMatrix) <- c("ROCAUC","C-Stat","Sen","Spe","RR")
pander::pander(meanMatrix)
  ROCAUC C-Stat Sen Spe RR
age 0.586 0.558 0.1157 0.894 0.997
wt.loss 0.558 0.511 0.0496 0.894 0.785

Modeling

ml <- BSWiMS.model(Surv(time,status)~1,data=lung,NumberofRepeats = 10)

[+++++++++++++++++++++++++++]..

sm <- summary(ml)
pander::pander(sm$coefficients)
Table continues below
  Estimate lower HR upper u.Accuracy r.Accuracy
ph.ecog 4.32e-01 1.194 1.541 1.988 0.679 0.649
sex -4.59e-01 0.456 0.632 0.876 0.649 0.679
pat.karno -1.77e-03 0.997 0.998 1.000 0.506 0.720
ph.karno -4.06e-07 1.000 1.000 1.000 0.577 0.720
Table continues below
  full.Accuracy u.AUC r.AUC full.AUC IDI NRI
ph.ecog 0.601 0.601 0.620 0.600 0.0449 0.405
sex 0.601 0.620 0.601 0.600 0.0285 0.478
pat.karno 0.506 0.585 0.500 0.585 0.0292 0.342
ph.karno 0.577 0.570 0.500 0.570 0.0143 0.280
  z.IDI z.NRI Delta.AUC Frequency
ph.ecog 3.33 2.48 -0.02005 1.0
sex 2.76 2.85 -0.00167 1.0
pat.karno 2.44 2.24 0.08546 1.0
ph.karno 2.22 1.64 0.06998 0.7

Cox Model Performance

Here we evaluate the model using the RRPlot() function.

The evaluation of the raw Cox model with RRPlot()

Here we will use the predicted event probability assuming a baseline hazard for events withing 5 years

timeinterval <- 2*mean(subset(lung,status==1)$time)

h0 <- sum(lung$status & lung$time <= timeinterval)
h0 <- h0/sum((lung$time > timeinterval) | (lung$status==1))
pander::pander(t(c(h0=h0,timeinterval=timeinterval)),caption="Initial Parameters")
Initial Parameters
h0 timeinterval
0.85 578
index <- predict(ml,lung)

rdata <- cbind(lung$status,ppoisGzero(index,h0))

rrAnalysisTrain <- RRPlot(rdata,atProb=c(0.90),
                     timetoEvent=lung$time,
                     title="Raw Train: Lung Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

As we can see the Observed probability as well as the Time vs. Events are not calibrated.

Uncalibrated Performance Report

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.649 0.475 0.339 0.339 0.493
RR 1.280 1.789 60.500 60.500 1.312
SEN 0.306 0.843 1.000 1.000 0.636
SPE 0.872 0.489 0.170 0.170 0.596
BACC 0.589 0.666 0.585 0.585 0.616
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.65 1.37 1.97 3.16e-07
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.23 1.23 1.19 1.27
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
1.21 1.21 1.2 1.22
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.652 0.65 0.585 0.709
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.69 0.597 0.783
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.306 0.225 0.396
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.872 0.743 0.952
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% at_max_BACC at_max_RR atSPE100 at_0.5
0.649 0.475 0.339 0.339 0.5
pander::pander(t(rrAnalysisTrain$RR_atP),caption="Risk Ratio")
Risk Ratio
est lower upper
1.28 1.08 1.52
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 8.009591 on 1 degrees of freedom, p = 0.004653
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 125 84 96.5 1.61 8.01
class=1 43 37 24.5 6.34 8.01

Cox Calibration

op <- par(no.readonly = TRUE)


calprob <- CoxRiskCalibration(ml,lung,"status","time")

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
1.29 1.52 749

The RRplot() of the calibrated model

h0 <- calprob$h0
timeinterval <- calprob$timeInterval;

rdata <- cbind(lung$status,calprob$prob)


rrAnalysisTrain <- RRPlot(rdata,atProb=c(0.90),
                     timetoEvent=lung$time,
                     title="Train: Lung",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Calibrated Train Performance

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.796 0.624 0.467 0.467 0.479
RR 1.280 1.789 60.500 60.500 2.784
SEN 0.306 0.843 1.000 1.000 0.959
SPE 0.872 0.489 0.170 0.170 0.277
BACC 0.589 0.666 0.585 0.585 0.618
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.45 1.2 1.73 0.000124
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.06 1.06 1.02 1.09
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
1.01 1.01 1 1.01
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.652 0.651 0.587 0.711
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.691 0.598 0.784
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.306 0.225 0.396
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.872 0.743 0.952
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% at_max_BACC at_max_RR atSPE100 at_0.5
0.796 0.624 0.467 0.467 0.5
pander::pander(t(rrAnalysisTrain$RR_atP),caption="Risk Ratio")
Risk Ratio
est lower upper
1.28 1.08 1.52
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 8.009591 on 1 degrees of freedom, p = 0.004653
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 125 84 96.5 1.61 8.01
class=1 43 37 24.5 6.34 8.01

Cross-Validation

rcv <- randomCV(theData=lung,
                theOutcome = Surv(time,status)~1,
                fittingFunction=BSWiMS.model, 
                trainFraction = 0.95,
                repetitions=200,
                classSamplingType = "Pro"
         )

.[+++].[++].[+++].[+++].[+++].[++].[+++].[+++].[+++].[+++]10 Tested: 72 Avg. Selected: 3.8 Min Tests: 1 Max Tests: 4 Mean Tests: 1.388889 . MAD: 0.4728702

.[++].[++++].[++].[+++].[++].[+++].[+].[+++].[+++].[++++]20 Tested: 118 Avg. Selected: 3.75 Min Tests: 1 Max Tests: 6 Mean Tests: 1.694915 . MAD: 0.474057

.[+++].[+++].[++].[++].[+++].[+++].[+++].[+++].[+++].[+++]30 Tested: 141 Avg. Selected: 3.8 Min Tests: 1 Max Tests: 8 Mean Tests: 2.12766 . MAD: 0.4730948

.[++-].[++].[+++].[+++].[++].[++-].[+++].[++].[++].[+++]40 Tested: 152 Avg. Selected: 3.7 Min Tests: 1 Max Tests: 8 Mean Tests: 2.631579 . MAD: 0.4741662

.[+++].[+++].[++-].[+].[+++].[+++].[+++].[+++].[++].[+++]50 Tested: 157 Avg. Selected: 3.68 Min Tests: 1 Max Tests: 9 Mean Tests: 3.184713 . MAD: 0.4754415

.[++].[+++].[+++].[+++].[+++].[+++].[+++].[++].[++].[+++]60 Tested: 160 Avg. Selected: 3.683333 Min Tests: 1 Max Tests: 9 Mean Tests: 3.75 . MAD: 0.4776102

.[+++].[++].[+++].[++].[++++].[+++].[+++].[+++].[+++].[++]70 Tested: 165 Avg. Selected: 3.7 Min Tests: 1 Max Tests: 11 Mean Tests: 4.242424 . MAD: 0.4780849

.[++].[+++].[+++].[+++].[+++].[++].[++].[+++].[+++].[+++]80 Tested: 167 Avg. Selected: 3.7 Min Tests: 1 Max Tests: 12 Mean Tests: 4.790419 . MAD: 0.477782

.[+++].[+++].[++].[+++].[+++].[++].[+++].[+++].[+++].[++]90 Tested: 167 Avg. Selected: 3.7 Min Tests: 1 Max Tests: 14 Mean Tests: 5.389222 . MAD: 0.4774375

.[+++].[++].[+++-].[++].[++].[++-].[+++].[++].[++].[+++]100 Tested: 168 Avg. Selected: 3.67 Min Tests: 1 Max Tests: 18 Mean Tests: 5.952381 . MAD: 0.4761966

.[+++].[+++].[++].[+++].[+++].[++].[+++].[+-].[+].[+]110 Tested: 168 Avg. Selected: 3.627273 Min Tests: 1 Max Tests: 19 Mean Tests: 6.547619 . MAD: 0.4758525

.[++].[+++].[+++].[+++].[++].[+++].[+++].[+++].[+++].[++++]120 Tested: 168 Avg. Selected: 3.65 Min Tests: 2 Max Tests: 20 Mean Tests: 7.142857 . MAD: 0.4756308

.[+++].[+++].[+++].[+++].[+++].[+++].[++].[+++].[+++].[+++]130 Tested: 168 Avg. Selected: 3.669231 Min Tests: 2 Max Tests: 20 Mean Tests: 7.738095 . MAD: 0.4757132

.[+++].[+++].[+++].[+++].[+++].[++++].[+++].[++++].[+++].[+++]140 Tested: 168 Avg. Selected: 3.707143 Min Tests: 2 Max Tests: 20 Mean Tests: 8.333333 . MAD: 0.4756489

.[+++].[+++].[+++].[+++].[++].[+++].[++].[++].[++].[++]150 Tested: 168 Avg. Selected: 3.693333 Min Tests: 2 Max Tests: 20 Mean Tests: 8.928571 . MAD: 0.4755712

.[++].[+++].[++].[+++].[++].[+++].[++].[++].[++].[+++]160 Tested: 168 Avg. Selected: 3.675 Min Tests: 3 Max Tests: 21 Mean Tests: 9.52381 . MAD: 0.4752559

.[+++].[++].[+++].[++].[+++].[++].[+++].[++].[+++].[+++]170 Tested: 168 Avg. Selected: 3.670588 Min Tests: 3 Max Tests: 21 Mean Tests: 10.11905 . MAD: 0.4753232

.[+++].[++].[+++].[+++].[++].[+++].[+].[+++].[+++].[+++]180 Tested: 168 Avg. Selected: 3.666667 Min Tests: 3 Max Tests: 22 Mean Tests: 10.71429 . MAD: 0.47569

.[+++].[+++].[++].[++++].[+++].[++].[+++].[++].[+++].[+++]190 Tested: 168 Avg. Selected: 3.673684 Min Tests: 4 Max Tests: 23 Mean Tests: 11.30952 . MAD: 0.4757949

.[+++].[++].[+++].[+++].[+++].[+++].[++++].[+++].[++].[+++]200 Tested: 168 Avg. Selected: 3.685 Min Tests: 4 Max Tests: 25 Mean Tests: 11.90476 . MAD: 0.4761283

stp <- rcv$survTestPredictions
stp <- stp[!is.na(stp[,4]),]

bbx <- boxplot(unlist(stp[,1])~rownames(stp),plot=FALSE)
times <- bbx$stats[3,]
status <- boxplot(unlist(stp[,2])~rownames(stp),plot=FALSE)$stats[3,]
prob <- ppoisGzero(boxplot(unlist(stp[,4])~rownames(stp),plot=FALSE)$stats[3,],h0)

rdatacv <- cbind(status,prob)
rownames(rdatacv) <- bbx$names
names(times) <- bbx$names

rrAnalysisTest <- RRPlot(rdatacv,atProb=c(0.90),
                     timetoEvent=times,
                     title="Test: Lung Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Cross-Validation Test Performance

pander::pander(t(rrAnalysisTest$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.803 0.610 0.610 0.4480 0.501
RR 1.232 2.958 2.958 7.2455 2.612
SEN 0.231 0.959 0.959 1.0000 0.959
SPE 0.894 0.298 0.298 0.0213 0.255
BACC 0.563 0.628 0.628 0.5106 0.607
pander::pander(t(rrAnalysisTest$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.45 1.2 1.73 0.000124
pander::pander(t(rrAnalysisTest$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.05 1.05 1.01 1.09
pander::pander(t(rrAnalysisTest$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
0.952 0.952 0.943 0.961
pander::pander(rrAnalysisTest$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.602 0.601 0.538 0.666
pander::pander(t(rrAnalysisTest$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.609 0.51 0.709
pander::pander((rrAnalysisTest$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.231 0.16 0.317
pander::pander((rrAnalysisTest$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.894 0.769 0.965
pander::pander(t(rrAnalysisTest$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% at_max_BACC at_max_RR atSPE100 at_0.5
0.802 0.61 0.61 0.448 0.5
pander::pander(t(rrAnalysisTest$RR_atP),caption="Risk Ratio")
Risk Ratio
est lower upper
1.23 1.03 1.48
pander::pander(rrAnalysisTest$surdif,caption="Logrank test")
Logrank test Chisq = 5.311664 on 1 degrees of freedom, p = 0.021183
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 135 93 102.1 0.817 5.31
class=1 33 28 18.9 4.421 5.31

Calibrating the test results

rdatacv <- cbind(status,prob,times)
calprob <- CalibrationProbPoissonRisk(rdatacv)

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
0.85 1 754
timeinterval <- calprob$timeInterval;

rdata <- cbind(status,calprob$prob)


rrAnalysisTest <- RRPlot(rdata,atProb=c(0.90),
                     timetoEvent=times,
                     title="Calibrated Test: Lung",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Calibrated Test Performance

pander::pander(t(rrAnalysisTest$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.803 0.610 0.610 0.4480 0.501
RR 1.232 2.958 2.958 7.2455 2.612
SEN 0.231 0.959 0.959 1.0000 0.959
SPE 0.894 0.298 0.298 0.0213 0.255
BACC 0.563 0.628 0.628 0.5106 0.607
pander::pander(t(rrAnalysisTest$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.45 1.21 1.74 9.57e-05
pander::pander(t(rrAnalysisTest$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.06 1.06 1.02 1.09
pander::pander(t(rrAnalysisTest$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
0.952 0.952 0.943 0.961
pander::pander(rrAnalysisTest$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.602 0.6 0.535 0.664
pander::pander(t(rrAnalysisTest$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.609 0.51 0.709
pander::pander((rrAnalysisTest$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.231 0.16 0.317
pander::pander((rrAnalysisTest$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.894 0.769 0.965
pander::pander(t(rrAnalysisTest$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% at_max_BACC at_max_RR atSPE100 at_0.5
0.802 0.61 0.61 0.448 0.5
pander::pander(t(rrAnalysisTest$RR_atP),caption="Risk Ratio")
Risk Ratio
est lower upper
1.23 1.03 1.48
pander::pander(rrAnalysisTest$surdif,caption="Logrank test")
Logrank test Chisq = 5.311664 on 1 degrees of freedom, p = 0.021183
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 135 93 102.1 0.817 5.31
class=1 33 28 18.9 4.421 5.31